Packages

library(knitr)
library(kableExtra)
library(tidyverse)
library(Hmisc)
library(LSAfun)
library(ez)
library(broom)
library(ngram)
library(RColorBrewer)

Custom Functions

# Function that simplifies the printing of tables
matt_kable <- function(x){
  
  kable(x) %>%
    kable_styling(bootstrap_options = c('striped', 'hover', 'responsive'))
  
}

Plotting Tools

pd <- position_dodge(width = .1)
pj <- position_jitter(width = .1)

pu_pal <- brewer.pal(9, "Purples") # display.brewer.pal(9, "Purples")

Data

This chunk will load various data frames prepared from preprocessing scripts.

load("EN_100k.rda") # Loads LSA semantic space

# These data were created by running: source("dod-prepro.R")
load("dod-pre-data.RData")          # From DOD study
load("dod-hayling-tbi-trim.RData")  # From DOD study
load("dod-hayling-ctl-trim.RData")  # From DOD study

# These data were created by running: source("control-prepro.R")
load("control-hsct-data.RData")   # From HSCT Control study
load("control-iq-data.RData")     # From HSCT Control study
load("control-demo-data.RData")   # From HSCT Control study

# SUBTLEX corpus
subtlus <- read_delim('SUBTLEXus74286wordstextversion.txt', delim = '\t')

Step 1

Seeing if groups are balanced:

# DOD
# This pipeline prepares the DOD neuropsych data to match the HSCT control study
dod_neuropsych <- dod_pre %>%
  select(DODID = dodID, 
         group = participantType, 
         yrEd, 
         sex, 
         age, 
         audit, 
         bdi,
         sc.time.raw = sec1RawScore,
         sc.time.scaled = sec1SS,
         uc.time.raw = sec2RawScore,
         uc.time.scaled = sec2SS,
         cat.a.errors = catAErrors,
         a.score = aScore,
         cat.b.errors = catBErrors,
         b.score = bScore,
         ab.score = abScore,
         errors.scaled = sec2ErrorsSS,
         total.scaled = abcSS,
         hsct.scaled = overallSS,
         wasi.vocab.rscore = vocabRawScore,
         wasi.vocab.tscore = vocabTScore,
         wasi.similarities.rscore = similaritiesRawScore,
         wasi.similarities.tscore = similaritiesTScore,
         wasi.matrix.rscore = matrixRawScore,
         wasi.matrix.tscore = matrixTScore,
         # wasi.viq = ,    Not available in DOD data, but is possible to obtain
         wasi.full2IQ = wasiIQ,
         wtar.rscore = wtarRawScore,
         wtar.sscore = wtarSS
         ) %>%
  rowwise() %>%
  mutate(wasi.tverbal = sum(wasi.vocab.tscore, wasi.similarities.tscore),
         wasi.full2subtest = sum(wasi.vocab.tscore, wasi.matrix.tscore),
         audit = as.numeric(audit)
         ) %>%
  ungroup() %>%
  filter(complete.cases(hsct.scaled)) %>% # removes ss if they don't have HSCT
  # removes if they don't have HSCT words
  filter(DODID %in% c(dod_hayling_tbi_trim$DODID, dod_hayling_ctl_trim$DODID)) %>%
  # The following participants need to be removed bc of the following:
  #   DOD-0276 : brain tumor
  #   DOD-0381 : has Erb's Palsy and embolisms due to diving accident
  #   DOD-1114 : has bipolar and depression
  #   DOD-5359 : did not have a TBI, but had LOC due to lack of oxygen
  filter(DODID %nin% c("DOD-0276", "DOD-0381", "DOD-1114", "DOD-5359"))

# HSCT controls
# This pipeline prepares the HSCT control data to match the DOD neuropsych data
hsct_neuropsych <- control_iq_data %>% 
  inner_join(., control_demo_data %>% 
               mutate(ssID = as.numeric(ID)), 
             by = "ssID"
             ) %>%
  inner_join(., control_hsct_data, by = "ssID") %>%
  select(DODID = ssID,
         starts_with("wasi"),
         sex = Sex,
         age = Age,
         yrEd = Edu,
         audit = AUDIT_SCORE,
         bdi = BDI_SCORE,
         sc.time.raw:hsct.scaled,
         starts_with("wasi"),
         starts_with("wtar")
         ) %>%
  filter(complete.cases(hsct.scaled)) %>%
  filter(DODID != 3231 | DODID != 5481 | DODID != 1370) %>% # DROP THESE SS
  mutate(group = "Control", 
         DODID = as.character(DODID), 
         yrEd = as.numeric(yrEd),
         age = as.numeric(age),
         bdi = as.numeric(bdi),
         audit = as.numeric(audit)
         ) 

# Combines all demo and neuropsych into one df
demo_neuropsych <- bind_rows(hsct_neuropsych, dod_neuropsych)

Now let’s look at some plots to see the participant demographics etc.

# Plots of some important equating measures
# AUDIT
ggplot(demo_neuropsych, aes(audit, fill = group)) +
  geom_histogram(binwidth = 2, color = "black", alpha = 1/3) +
  geom_vline(xintercept = 10, color = "black", linetype = 2) +
  theme_minimal() +
  facet_wrap(~group)

# AGE
ggplot(demo_neuropsych, aes(age, fill = group)) +
  geom_histogram(binwidth = 5, color = "black", alpha = 1/3) +
  theme_minimal() +
  facet_wrap(~group)

# BDI
ggplot(demo_neuropsych, aes(bdi, fill = group)) +
  geom_histogram(binwidth = 5, color = "black", alpha = 1/3) +
  theme_minimal() +
  facet_wrap(~group)

# WASI Full-2 IQ
ggplot(demo_neuropsych, aes(wasi.full2IQ, fill = group)) +
  geom_histogram(binwidth = 5, color = "black", alpha = 1/3) +
  theme_minimal() +
  facet_wrap(~group)

# Sex
ggplot(demo_neuropsych, aes(sex, fill = group)) +
  geom_bar(alpha = 2/3, color = "black") +
  theme_minimal()

Now, let’s drop participants that have:

  • AUDIT scores > 10
  • BDI scores > 29 (We decided not to drop based on BDI)

and summaize all measures separated by group.

A couple of important NAs with emerge:

  • DOD-1879 has no BDI
  • DOD-3484 was only administered 1/2 of the BDI form

The rest of the NAs that did emerge: we went through the paper copies and updated the “dod-neuropsych-data.xlsx” sheet = “mattDatabase_addedHSCT_SS”

# Cleaning up the big df with various measures
demo_neuropsych_clean <- demo_neuropsych %>% 
  filter(audit <= 10) %>% # drops ss with elevated audit scores
  #filter(bdi <= 29) %>% # drops ss with BDI > 29
  gather(meas, value, -DODID, -group) %>% # converts to long format
  mutate(group_cc = ifelse(group == "Control", .5, -.5)) # for linear modeling

demo_clean_desc <- demo_neuropsych_clean %>%
  filter(meas %nin% "sex") %>%
  mutate(value = as.numeric(value)) %>%
  group_by(group, meas) %>%
  summarise(n = n(),
            n.na = sum(is.na(value)),
            m = mean(value, na.rm = TRUE),
            sd = sd(value, na.rm = TRUE)
            ) %>%
  ungroup()

matt_kable(demo_clean_desc %>% arrange(meas))
group meas n n.na m sd
Control a.score 87 0 3.0804598 5.2367627
TBI a.score 105 0 2.2476190 4.2782566
Control ab.score 87 0 4.9310345 6.7939664
TBI ab.score 105 0 4.8000000 7.1982904
Control age 87 0 32.5057471 13.4249625
TBI age 105 0 41.7904762 12.9122000
Control audit 87 0 2.9080460 2.6176156
TBI audit 105 0 2.3428571 2.4527401
Control b.score 87 0 1.8505747 2.8713854
TBI b.score 105 0 2.5523810 3.9685808
Control bdi 87 0 5.8505747 6.2738452
TBI bdi 105 2 18.7087379 11.3557717
Control cat.a.errors 87 0 0.9310345 1.3707332
TBI cat.a.errors 105 0 0.6857143 1.2114382
Control cat.b.errors 87 0 1.5287356 1.5982482
TBI cat.b.errors 105 0 1.7904762 1.9250137
Control errors.scaled 87 0 6.4482759 1.4286001
TBI errors.scaled 105 0 6.4857143 1.6819991
Control hsct.scaled 87 0 6.0919540 1.1972078
TBI hsct.scaled 105 0 5.8761905 1.2610682
Control sc.time.raw 87 0 7.8160920 6.3840804
TBI sc.time.raw 105 0 8.3904762 7.5262301
Control sc.time.scaled 87 0 5.6321839 0.8506488
TBI sc.time.scaled 105 0 5.5904762 0.8049754
Control total.scaled 87 0 18.0574713 2.2222779
TBI total.scaled 105 0 17.7619048 2.4515451
Control uc.time.raw 87 0 20.9770115 18.3543779
TBI uc.time.raw 105 0 31.5619048 27.6773734
Control uc.time.scaled 87 0 6.0459770 0.7762169
TBI uc.time.scaled 105 0 5.6857143 0.9836577
Control wasi.full2IQ 87 0 109.0689655 12.6047177
TBI wasi.full2IQ 105 0 109.7619048 13.7718524
Control wasi.full2subtest 87 0 109.9080460 14.0279059
TBI wasi.full2subtest 105 0 109.7619048 13.7718524
Control wasi.matrix.rscore 87 0 28.2643678 3.6804315
TBI wasi.matrix.rscore 105 0 26.5428571 5.1980554
Control wasi.matrix.tscore 87 0 57.8160920 7.5290580
TBI wasi.matrix.tscore 105 0 57.6476190 8.7078446
Control wasi.similarities.rscore 87 0 37.6781609 3.9369870
TBI wasi.similarities.rscore 105 0 37.1428571 4.1357461
Control wasi.similarities.tscore 87 0 54.6321839 6.9014911
TBI wasi.similarities.tscore 105 0 53.6857143 6.8771001
Control wasi.tverbal 87 0 106.8390805 14.0231317
TBI wasi.tverbal 105 0 105.8000000 13.7003650
Control wasi.viq 87 20 105.0000000 9.3338744
TBI wasi.viq 105 105 NaN NaN
Control wasi.vocab.rscore 87 0 58.5632184 7.8424918
TBI wasi.vocab.rscore 105 0 59.2380952 8.3970276
Control wasi.vocab.tscore 87 0 52.2068966 8.9145008
TBI wasi.vocab.tscore 105 0 52.1142857 8.8048438
Control wtar.rscore 87 20 41.5970149 5.8802693
TBI wtar.rscore 105 0 39.9619048 6.8611557
Control wtar.sscore 87 20 113.2388060 9.6044332
TBI wtar.sscore 105 0 110.5238095 10.4274925
Control yrEd 87 0 15.1954023 1.9098221
TBI yrEd 105 0 15.9619048 2.7523550
# Looking a those with NAs
# yo <- demo_neuropsych_clean %>% filter(is.na(value))

# TBI measures
dod_pre %>%
  filter(dodID %in% unique(demo_neuropsych_clean$DODID)) %>%
  select(dodID, osuWorstInjury) %>%
  count(osuWorstInjury) # The NA's are from DOD controls
# A tibble: 5 x 2
  osuWorstInjury     n
           <dbl> <int>
1              2    18
2              3    51
3              4    12
4              5    24
5             NA    20
# Runs linear model on whole data set
demo_neuropsych_lm <- demo_neuropsych_clean %>%
  filter(meas %nin% c("sex")) %>% # filters out non numeric vars
  group_by(meas) %>%
  do(mod = lm(value ~ 1 + group_cc, data = .)) # t-test on the measures

# Tidying the lms to plot
demo_neuropsych_tidy <- demo_neuropsych_lm %>% 
  tidy(mod) %>%
  mutate(sig = ifelse(p.value < .05, "True", "False"))

# plotting the lms to see what is different between the groups
ggplot(demo_neuropsych_tidy %>% filter(term %nin% "(Intercept)"),
       aes(statistic, reorder(meas, statistic) , color = sig)
       ) +
  geom_point()

# Table of participants included in analyses
matt_kable(demo_neuropsych_clean %>% 
             filter(meas == "age") %>% 
             select(DODID, group)
           )
DODID group
201 Control
301 Control
401 Control
8406 Control
7391 Control
9690 Control
1707 Control
1482 Control
6171 Control
4487 Control
5589 Control
4292 Control
2959 Control
6779 Control
7767 Control
1829 Control
3761 Control
9914 Control
2069 Control
5720 Control
9433 Control
1603 Control
1267 Control
2178 Control
8352 Control
9049 Control
1860 Control
7242 Control
5637 Control
4977 Control
2939 Control
3883 Control
8957 Control
5426 Control
2592 Control
2809 Control
7538 Control
9144 Control
7077 Control
5061 Control
1863 Control
1447 Control
8759 Control
1240 Control
2970 Control
5386 Control
3230 Control
2174 Control
7076 Control
7525 Control
1228 Control
3398 Control
2302 Control
8067 Control
5053 Control
5033 Control
4556 Control
1473 Control
1359 Control
3212 Control
2624 Control
6961 Control
6201 Control
9906 Control
7846 Control
8838 Control
3808 Control
DOD-0001 Control
DOD-0003 Control
DOD-0004 Control
DOD-0005 Control
DOD-0008 Control
DOD-0009 Control
DOD-0011 Control
DOD-0012 Control
DOD-0013 Control
DOD-0015 Control
DOD-0016 Control
DOD-0017 Control
DOD-0018 Control
DOD-0019 Control
DOD-0021 Control
DOD-0022 Control
DOD-0023 Control
DOD-0025 Control
DOD-0027 Control
DOD-0028 Control
DOD-0207 TBI
DOD-0229 TBI
DOD-0583 TBI
DOD-0837 TBI
DOD-0913 TBI
DOD-0919 TBI
DOD-0966 TBI
DOD-1012 TBI
DOD-1022 TBI
DOD-1148 TBI
DOD-1202 TBI
DOD-1240 TBI
DOD-1411 TBI
DOD-1504 TBI
DOD-1764 TBI
DOD-1879 TBI
DOD-2137 TBI
DOD-2165 TBI
DOD-2171 TBI
DOD-2286 TBI
DOD-2329 TBI
DOD-2381 TBI
DOD-2524 TBI
DOD-2551 TBI
DOD-2553 TBI
DOD-2582 TBI
DOD-2619 TBI
DOD-2636 TBI
DOD-2671 TBI
DOD-2761 TBI
DOD-2805 TBI
DOD-3012 TBI
DOD-3034 TBI
DOD-3049 TBI
DOD-3061 TBI
DOD-3186 TBI
DOD-3372 TBI
DOD-3484 TBI
DOD-3787 TBI
DOD-3859 TBI
DOD-3875 TBI
DOD-4157 TBI
DOD-4639 TBI
DOD-4673 TBI
DOD-4727 TBI
DOD-4755 TBI
DOD-4783 TBI
DOD-4936 TBI
DOD-4941 TBI
DOD-5198 TBI
DOD-5335 TBI
DOD-5690 TBI
DOD-5828 TBI
DOD-5851 TBI
DOD-5916 TBI
DOD-5932 TBI
DOD-5977 TBI
DOD-6030 TBI
DOD-6054 TBI
DOD-6075 TBI
DOD-6260 TBI
DOD-6284 TBI
DOD-6315 TBI
DOD-6388 TBI
DOD-6416 TBI
DOD-6599 TBI
DOD-6727 TBI
DOD-6783 TBI
DOD-6785 TBI
DOD-6900 TBI
DOD-7013 TBI
DOD-7024 TBI
DOD-7139 TBI
DOD-7283 TBI
DOD-7292 TBI
DOD-7491 TBI
DOD-7507 TBI
DOD-7969 TBI
DOD-8023 TBI
DOD-8054 TBI
DOD-8064 TBI
DOD-8206 TBI
DOD-8209 TBI
DOD-8292 TBI
DOD-8303 TBI
DOD-8322 TBI
DOD-8432 TBI
DOD-8482 TBI
DOD-8533 TBI
DOD-8566 TBI
DOD-8647 TBI
DOD-8688 TBI
DOD-8854 TBI
DOD-8923 TBI
DOD-9029 TBI
DOD-9076 TBI
DOD-9244 TBI
DOD-9469 TBI
DOD-9569 TBI
DOD-9572 TBI
DOD-9580 TBI
DOD-9698 TBI
DOD-9859 TBI
DOD-9916 TBI
DOD-9958 TBI

Now, taking a look at if we were to look at the groups separated by OSU worst injury score:

1 = “improbable TBI” 2 = “possible TBI” 3 = “mild TBI” 4 = “moderately severe TBI” 5 = “more severe TBI”

osu_simple <- dod_pre %>% select(DODID = dodID, osuWorstInjury)

demo_neuropsych_clean_2 <- demo_neuropsych %>% 
  filter(audit <= 10) %>% # drops ss with elevated audit scores
  #filter(bdi <= 29) %>%
  left_join(., osu_simple, by = "DODID") %>%
  mutate(osuWIS = ifelse(group == "Control" & is.na(osuWorstInjury),
                         "Control",
                         osuWorstInjury)
         ) %>%
  mutate(osuWIS = fct_relevel(osuWIS, c("Control", "2", "3", "4", "5"))) %>%
  gather(meas, value, -DODID, -group, -osuWIS) %>% # converts to long format
  mutate(group_cc = ifelse(group == "Control", .5, -.5), # for linear modeling
         value = as.numeric(value)
         ) 

demo_clean_desc_2 <- demo_neuropsych_clean_2 %>%
  filter(meas %nin% "sex") %>%
  mutate(value = as.numeric(value)) %>%
  group_by(osuWIS, meas) %>%
  summarise(n = n(),
            n.na = sum(is.na(value)),
            m = mean(value, na.rm = TRUE),
            sd = sd(value, na.rm = TRUE),
            sem = sd/sqrt(n)
            ) %>%
  ungroup()


# Plots of some important equating measures
demo_clean_desc_2 %>% filter(meas == "audit")
# A tibble: 5 x 7
  osuWIS  meas      n  n.na     m    sd   sem
  <fct>   <chr> <int> <int> <dbl> <dbl> <dbl>
1 Control audit    87     0  2.91  2.62 0.281
2 2       audit    18     0  2.5   2.31 0.544
3 3       audit    51     0  2.45  2.66 0.373
4 4       audit    12     0  2.58  2.97 0.857
5 5       audit    24     0  1.88  1.83 0.373
# AUDIT
ggplot(demo_clean_desc_2 %>% filter(meas == "audit"), 
       aes(osuWIS, m, fill = osuWIS)
       ) +
  geom_bar(stat = "identity", color = "black") +
  geom_errorbar(aes(ymin = m-sem, ymax = m+sem), width = .1) +
  geom_vline(xintercept = 10, color = "black", linetype = 2) +
  scale_fill_brewer(palette = "Purples") +
  labs(y = "AUDIT", caption = "SEM error bars") +
  theme_minimal()

ggplot(demo_neuropsych_clean_2 %>% filter(meas == "audit"),
       aes(osuWIS, value, fill = osuWIS, group = osuWIS)
       ) +
  geom_boxplot() +
  scale_fill_brewer(palette = "Purples") +
  labs(x = "OSU Worst Injury Score", y = "AUDIT Score") +
  theme_minimal()

# AGE
ggplot(demo_clean_desc_2 %>% filter(meas == "age"), 
       aes(osuWIS, m, fill = osuWIS)
       ) +
  geom_bar(stat = "identity", color = "black") +
  geom_errorbar(aes(ymin = m-sem, ymax = m+sem), width = .1) +
  geom_vline(xintercept = 10, color = "black", linetype = 2) +
  scale_fill_brewer(palette = "Purples") +
  labs(y = "Age", caption = "SEM error bars") +
  theme_minimal()

ggplot(demo_neuropsych_clean_2 %>% filter(meas == "age"),
       aes(osuWIS, value, fill = osuWIS, group = osuWIS)
       ) +
  geom_boxplot() +
  scale_fill_brewer(palette = "Purples") +
  labs(x = "OSU Worst Injury Score", y = "Age (years)") +
  theme_minimal()

# BDI
ggplot(demo_clean_desc_2 %>% filter(meas == "bdi"), 
       aes(osuWIS, m, fill = osuWIS)
       ) +
  geom_bar(stat = "identity", color = "black") +
  geom_errorbar(aes(ymin = m-sem, ymax = m+sem), width = .1) +
  scale_fill_brewer(palette = "Purples") +
  labs(y = "BDI", caption = "SEM error bars") +
  theme_minimal()

ggplot(demo_neuropsych_clean_2 %>% filter(meas == "bdi"),
       aes(osuWIS, value, fill = osuWIS, group = osuWIS)
       ) +
  geom_boxplot() +
  scale_fill_brewer(palette = "Purples") +
  labs(x = "OSU Worst Injury Score", y = "BDI") +
  theme_minimal()

# WASI Full-2 IQ
ggplot(demo_clean_desc_2 %>% filter(meas == "wasi.full2IQ"), 
       aes(osuWIS, m, fill = osuWIS)
       ) +
  geom_bar(stat = "identity", color = "black") +
  geom_errorbar(aes(ymin = m-sem, ymax = m+sem), width = .1) +
  geom_vline(xintercept = 10, color = "black", linetype = 2) +
  scale_fill_brewer(palette = "Purples") +
  labs(y = "WASI Full 2 IQ", caption = "SEM error bars") +
  theme_minimal()

ggplot(demo_neuropsych_clean_2 %>% filter(meas == "wasi.full2IQ"),
       aes(osuWIS, value, fill = osuWIS, group = osuWIS)
       ) +
  geom_boxplot() +
  scale_fill_brewer(palette = "Purples") +
  labs(x = "OSU Worst Injury Score", y = "WASI Full 2 IQ") +
  theme_minimal()

# Years of education
ggplot(demo_clean_desc_2 %>% filter(meas == "yrEd"), 
       aes(osuWIS, m, fill = osuWIS)
       ) +
  geom_bar(stat = "identity", color = "black") +
  geom_errorbar(aes(ymin = m-sem, ymax = m+sem), width = .1) +
  geom_vline(xintercept = 10, color = "black", linetype = 2) +
  scale_fill_brewer(palette = "Purples") +
  labs(y = "Education (years)", caption = "SEM error bars") +
  theme_minimal()

ggplot(demo_neuropsych_clean_2 %>% filter(meas == "yrEd"),
       aes(osuWIS, value, fill = osuWIS, group = osuWIS)
       ) +
  geom_boxplot() +
  scale_fill_brewer(palette = "Purples") +
  labs(x = "OSU Worst Injury Score", y = "Education (years)") +
  theme_minimal()

LSA

# Creates a data frame of the hayling sentences 
hsct_sents <- tibble(section = rep(c("SC", "UC"), each = 15),
                     num = rep(1:15, 2),
                     sent = c("He mailed a letter without a",
                              "In the first blank enter your",
                              "The old house will be torn",
                              "It's hard to admit when one is",
                              "The job was easy most of the",
                              "When you go to bed turn off the",
                              "The game was stopped when it started to",
                              "He scraped the cold food from his",
                              "The dispute was settled by a third",
                              "Three people were killed in an interstate",
                              "The baby cried and upset her",
                              "George could not believe that his son had stolen a",
                              "He crept into the room without a",
                              "Billy hit his sister on the",
                              "Too many men are out of",
                              "The captain wanted to stay with the sinking",
                              "They went as far as they",
                              "Most cats see very well at",
                              "Jean was glad the affair was",
                              "The whole town came to hear the mayor",
                              "Most sharks attack very close to",
                              "None of the books made any",
                              "The dough was put in the hot",
                              "She called the husband at his",
                              "All the guests had a very good",
                              "He bought them in the candy",
                              "His leaving home amazed all his",
                              "At last the time for action had",
                              "The dog chased our cat up the",
                              "At night they often took a short")
                     )

# This calculates the LSA values

# CONTROLS FROM DOD
dod_hsct_ctl_calc <- dod_hayling_ctl_trim %>% 
  gather(section, resp, -DODID) %>%
  separate(section, sep = 2, into = c("section", "num")) %>%
  mutate(num = as.numeric(num)) %>%
  inner_join(., hsct_sents, by = c("section", "num")) %>%
  group_by(DODID, section, num, resp, sent) %>%
  do(lsa = costring(.$sent, .$resp, EN_100k, breakdown = TRUE)) %>%
  mutate(lsa = unlist(lsa), group = "Control") %>% 
  ungroup()

# TBI FROM DOD
dod_hsct_tbi_calc <- dod_hayling_tbi_trim %>% 
  gather(section, resp, -DODID) %>%
  separate(section, sep = 2, into = c("section", "num")) %>%
  mutate(num = as.numeric(num)) %>%
  inner_join(., hsct_sents, by = c("section", "num")) %>%
  group_by(DODID, section, num, resp, sent) %>%
  do(lsa = costring(.$sent, .$resp, EN_100k, breakdown = TRUE)) %>%
  mutate(lsa = unlist(lsa), group = "TBI") %>% 
  ungroup()

# Controls from HSCT
ctl_hsct_calc <- control_hsct_data %>%
  filter(complete.cases(hsct.scaled)) %>%
  select(ssID, contains("word")) %>%
  gather(section, resp, -ssID) %>%
  separate(section, into = c("section", "drop_this", "num")) %>%
  mutate(num = as.numeric(num), 
         drop_this = NULL,
         section = toupper(section)
         ) %>%
  inner_join(., hsct_sents, by = c("section", "num")) %>%
  group_by(ssID, section, num, resp, sent) %>%
  do(lsa = costring(.$sent, .$resp, EN_100k, breakdown = TRUE)) %>%
  mutate(lsa = unlist(lsa), 
         group = "Control", 
         DODID = as.character(ssID), 
         ssID = NULL) %>%
  ungroup()
  
# Combines TBI+CONTROLS from DOD and HSCT into one df
lsa_data <- bind_rows(dod_hsct_ctl_calc, 
                      dod_hsct_tbi_calc,
                      ctl_hsct_calc
                      ) %>%
  filter(DODID %in% unique(demo_neuropsych_clean_2$DODID)) # drops ss from earlier

# For Marielle ---
all_lsa <- bind_rows(dod_hsct_ctl_calc, dod_hsct_tbi_calc, ctl_hsct_calc) %>%
  group_by(DODID, group, section) %>%
  summarise(n = n(), n.na = sum(is.na(lsa)), mLSA = mean(lsa, na.rm = T)) %>%
  ungroup()
write_csv(all_lsa, path = "all-lsa-values.csv")
# ---
  
# SUBJECT-WISE summary
lsa_data_ss <- lsa_data %>%
  group_by(DODID, group, section) %>%
  summarise(n = n(), n.na = sum(is.na(lsa)), mLSA = mean(lsa, na.rm = T)) %>%
  ungroup() %>%
  left_join(., osu_simple, by = "DODID") %>%
  mutate(osuWIS = ifelse(group == "Control" & is.na(osuWorstInjury),
                         "Control",
                         osuWorstInjury),
         osuGroups = case_when(
           osuWIS == "Control" ~ "Control",
           osuWIS == "2" ~ "Mild",
           osuWIS == "3" ~ "Mild",
           osuWIS == "4" ~ "Mod/Severe",
           osuWIS == "5" ~ "Mod/Severe"
           )
         ) %>%
  mutate(osuWIS = fct_relevel(osuWIS, c("Control", "2", "3", "4", "5")),
         osuGroups = fct_relevel(osuGroups, c("Control", "Mild", "Mod/Severe"))
         )

# GROUP-WISE summary
lsa_data_sum <- lsa_data_ss %>% 
  group_by(group, section) %>% 
  summarise(N = n(), m = mean(mLSA), sd = sd(mLSA), sem = sd/sqrt(N)) %>%
  ungroup()

lsa_osu_sum <- lsa_data_ss %>%
  group_by(osuWIS, section) %>%
  summarise(N = n(), m = mean(mLSA), sd = sd(mLSA), sem = sd/sqrt(N)) %>%
  ungroup()

lsa_groups_sum <- lsa_data_ss %>%
  group_by(osuGroups, section) %>%
  summarise(N = n(), m = mean(mLSA), sd = sd(mLSA), sem = sd/sqrt(N)) %>%
  ungroup()
  
ggplot(lsa_data_ss, aes(section, mLSA, group = DODID, color = group)) +
  geom_point() +
  geom_path(alpha = 2/3)

# Plot using OSU scores
pd <- position_dodge(width = .1)
pj <- position_jitter(width = .1)
ggplot(lsa_osu_sum, aes(section, m, group = osuWIS, color = osuWIS)) +
  geom_point(data = lsa_data_ss, aes(y = mLSA), 
             shape = 1, alpha = 2/3, position = pj, size = 2
             ) +
  geom_point(position = pd, size = 2) +
  geom_errorbar(aes(ymin = m-sem, ymax = m+sem), 
                width = .05, position = pd
                ) +
  geom_path(position = pd) +
  scale_color_brewer(palette = "Set1") +
  coord_cartesian(ylim = c(.4, .6)) +
  theme_minimal()

pd <- position_dodge(width = .1)
pj <- position_jitter(width = .1)
ggplot(lsa_groups_sum, aes(section, m, group = osuGroups, color = osuGroups)) +
  geom_point(data = lsa_data_ss, aes(y = mLSA), 
             shape = 1, alpha = 2/3, position = pj, size = 2
             ) +
  geom_point(position = pd, size = 2) +
  geom_errorbar(aes(ymin = m-sem, ymax = m+sem), 
                width = .05, position = pd
                ) +
  geom_path(position = pd) +
  scale_color_brewer(palette = "Set1") +
  #coord_cartesian(ylim = c(.4, .6)) +
  theme_minimal()

# THE plot
# Means + scatter
pd <- position_dodge(width = .1)
pj <- position_jitter(width = .1)
ggplot(lsa_data_sum, aes(section, m, group = group, color = group)) +
  geom_point(data = lsa_data_ss, aes(y = mLSA), 
             shape = 1, alpha = 2/3, position = pj, size = 2
             ) +
  geom_point(position = pd, size = 2) +
  geom_errorbar(aes(ymin = m-sem, ymax = m+sem), 
                width = .05, position = pd
                ) +
  geom_line(position = pd, alpha = 2/3) +
  labs(x = "Hayling Section", y = "Mean LSA Estimate", caption = "SEM error bars") +
  scale_color_brewer(palette = "Dark2") +
  theme_minimal() +
  theme(text = element_text(size = 15))

# Means
ggplot(lsa_data_sum, aes(section, m, group = group, color = group)) +
  # geom_point(data = lsa_data_ss, aes(y = mLSA), 
  #            shape = 1, alpha = 2/3, position = pj, size = 2
  #            ) +
  geom_point(position = pd, size = 2) +
  geom_errorbar(aes(ymin = m-sem, ymax = m+sem), 
                width = .05, position = pd
                ) +
  geom_line(position = pd, alpha = 2/3) +
  labs(x = "Hayling Section", y = "Mean LSA Estimate", caption = "SEM error bars") +
  coord_cartesian(ylim = c(.3, .6)) +
  scale_color_brewer(palette = "Dark2") +
  theme_minimal() +
  theme(text = element_text(size = 15))

# ggsave(filename = "lsa-plot-means.svg", width = 5, height = 4, units = "in", path = "viz/")
# ggsave(filename = "lsa-plot-means.png", width = 5, height = 4, units = "in", path = "viz/")

# Scatter
pd <- position_dodge(width = .1)
pj <- position_jitter(width = .1)
ggplot(lsa_data_sum, aes(section, m, group = group, color = group)) +
  geom_point(data = lsa_data_ss, aes(y = mLSA),
             shape = 1, alpha = 2/3, position = pj, size = 2
             ) +
  # geom_point(position = pd, size = 2) +
  # geom_errorbar(aes(ymin = m-sem, ymax = m+sem), 
  #               width = .05, position = pd
  #               ) +
  # geom_line(position = pd, alpha = 2/3) +
  labs(x = "Hayling Section", y = "Mean LSA Estimate", caption = "SEM error bars") +
  scale_color_brewer(palette = "Dark2") +
  coord_cartesian(ylim = c(.3, .6)) +
  theme_minimal() +
  theme(text = element_text(size = 15))

# ggsave(filename = "lsa-plot-scatter.svg", width = 5, height = 4, units = "in", path = "viz/")
# ggsave(filename = "lsa-plot-scatter.png", width = 5, height = 4, units = "in", path = "viz/")

# # Stats
# ezANOVA(data = lsa_data_ss,
#         dv = mLSA,
#         wid = DODID,
#         within = section,
#         between = group,
#         detailed = T)
# 
# ezANOVA(data = lsa_data_ss,
#         dv = mLSA,
#         wid = DODID,
#         within = section,
#         between = osuGroups,
#         detailed = T)

Modeling LSA

Modeling the LSA values with the Model Comparison Approach.

Orthogonal contrast codes will be defined as such:

  • Controls
  • Possible TBIs = OSU Worst Injury Score == 2
  • Mild TBIs = OSU Worst Injury Score == 3
  • Moderate-Severe TBI = OSU Worst Injury Score == 4 or 5
ccodes <- tibble(osuWIS = c("Control", "2", "3", "4/5"),
                 c1 = c(3, -1, -1, -1), # Control vs. TBI
                 c2 = c(0, -2, 1, 1),   # Possible vs. Mild/Mod/Sever
                 c3 = c(0, 0, -1, 1)    # Mild vs. Mod/Severe
                 )

matt_kable(ccodes)
osuWIS c1 c2 c3
Control 3 0 0
2 -1 -2 0
3 -1 1 -1
4/5 -1 1 1
# Makes it easier to join with long data frame
ccodes_merge <- tibble(osuWIS = c("Control", "2", "3", "4", "5"),
                       c1 = c(3, -1, -1, -1, -1), # Control vs. TBI
                       c2 = c(0, -2, 1, 1, 1),   # Possible vs. Mild/Mod/Sever
                       c3 = c(0, 0, -1, 1, 1)    # Mild vs. Mod/Severe
                       )

# Introduces contrast codes to the df
lsa_mod_data <- lsa_data_ss %>% 
  mutate(secc = ifelse(section == "SC", 1, -1)) %>% # dependency
  group_by(DODID, osuWIS) %>%
  # introduces dependency of HSCT section
  summarise(w0 = sum(mLSA*1)/sqrt(2),
            w1 = sum(mLSA*secc)/sqrt(sum(secc^2))
            ) %>%
  ungroup() %>%
  left_join(., ccodes_merge, by = "osuWIS") # between-subjects ccodes

# Models these data (see page 286 in MCA book)
lsa_mod <- lsa_mod_data %>%
  do(mod_w0 = lm(w0 ~ 1 + c1 + c2 + c3, data = .), 
     mod_w1 = lm(w1 ~ 1 + c1 + c2 + c3, data = .)
     )

# Prints their results to screen
summary(lsa_mod$mod_w0[[1]]) # Between-subjects 

Call:
lm(formula = w0 ~ 1 + c1 + c2 + c3, data = .)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.09593 -0.02191  0.00031  0.02249  0.12318 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.7082040  0.0029006 244.162   <2e-16 ***
c1          -0.0005600  0.0012984  -0.431    0.667    
c2           0.0003982  0.0029681   0.134    0.893    
c3          -0.0004386  0.0037330  -0.117    0.907    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.0343 on 188 degrees of freedom
Multiple R-squared:  0.00152,   Adjusted R-squared:  -0.01441 
F-statistic: 0.09542 on 3 and 188 DF,  p-value: 0.9625
summary(lsa_mod$mod_w1[[1]]) # HSCT section

Call:
lm(formula = w1 ~ 1 + c1 + c2 + c3, data = .)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.091384 -0.024933  0.005186  0.022716  0.074072 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  7.113e-02  2.809e-03  25.322   <2e-16 ***
c1           1.839e-03  1.257e-03   1.462    0.145    
c2          -5.845e-04  2.874e-03  -0.203    0.839    
c3           8.956e-05  3.615e-03   0.025    0.980    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.03321 on 188 degrees of freedom
Multiple R-squared:  0.01348,   Adjusted R-squared:  -0.002265 
F-statistic: 0.8561 on 3 and 188 DF,  p-value: 0.465
# Plots
lsa_mod_data_2 <- lsa_data_ss %>% left_join(., ccodes_merge, by = "osuWIS")

c1_sum <- lsa_mod_data_2 %>% 
  group_by(section, c1) %>% 
  summarise(n = n(), m = mean(mLSA), sd = sd(mLSA), sem = sd/sqrt(n))

c2_sum <- lsa_mod_data_2 %>% 
  group_by(section, c2) %>% 
  summarise(n = n(), m = mean(mLSA), sd = sd(mLSA), sem = sd/sqrt(n)) %>%
  filter(c2 != 0)

c3_sum <- lsa_mod_data_2 %>% 
  group_by(section, c3) %>% 
  summarise(n = n(), m = mean(mLSA), sd = sd(mLSA), sem = sd/sqrt(n)) %>%
  filter(c3 != 0)

ggplot(c1_sum, aes(section, m, group = factor(c1), color = factor(c1))) +
  geom_point(data = lsa_mod_data_2, 
             aes(x = section, mLSA), 
             shape = 1, 
             position = pj
             ) +
  geom_point(position = pd) +
  geom_errorbar(aes(ymin = m-sem, ymax = m+sem), width = .05, position = pd) +
  geom_line(position = pd) +
  scale_color_manual(values = c(pu_pal[6], pu_pal[9]), labels = c("TBI", "Controls")) +
  theme_minimal()

ggplot(c2_sum, aes(section, m, group = factor(c2), color = factor(c2))) +
  geom_point(data = lsa_mod_data_2 %>% filter(c2 != 0), 
             aes(x = section, mLSA), 
             shape = 1, 
             position = pj
             ) +
  geom_point(position = pd) +
  geom_errorbar(aes(ymin = m-sem, ymax = m+sem), width = .05, position = pd) +
  geom_line(position = pd) +
  scale_color_manual(values = c(pu_pal[7], pu_pal[9]), 
                     labels = c("Possible", "Mild/Mod/Severe")
                     ) +
  theme_minimal()

ggplot(c3_sum, aes(section, m, group = factor(c3), color = factor(c3))) +
  geom_point(data = lsa_mod_data_2 %>% filter(c3 != 0), 
             aes(x = section, mLSA), 
             shape = 1, 
             position = pj
             ) +
  geom_point(position = pd) +
  geom_errorbar(aes(ymin = m-sem, ymax = m+sem), width = .05, position = pd) +
  geom_line(position = pd) +
  scale_color_manual(values = c(pu_pal[8], pu_pal[9]), 
                     labels = c("Mild", "Mod/Severe")
                     ) +
  theme_minimal()

LSA with age

ages <- demo_neuropsych %>% select(DODID, age, wasi.full2IQ)

lsa_mod_data_age <- lsa_data_ss %>% 
  mutate(secc = ifelse(section == "SC", 1, -1)) %>% # dependency
  group_by(DODID, osuWIS) %>%
  # introduces dependency of HSCT section
  summarise(w0 = mean(mLSA), # between-ss effect
            w1 = sum(mLSA*secc) # within-ss effect of HSCT section 
            ) %>%
  ungroup() %>%
  left_join(., ccodes_merge, by = "osuWIS") %>% # between-subjects ccodes
  left_join(., ages, by = "DODID") %>%
  mutate(age_c = scale(age, center = TRUE, scale = FALSE), # mean centers age
         iq_c = scale(wasi.full2IQ, center = TRUE, scale = FALSE),
         osuWIS = factor(osuWIS, 
                         levels = c("Control", "2", "3", "4", "5"),
                         labels = c("Control","Possible", "Mild", "Mod/Severe", "Mod/Severe") # converts to factor
                        )
         )

# Contrasts ---
ccodes_mat <- as.matrix(ccodes[,-1]) # reduces contrasts to matrix

# imports these contrasts to factor
contrasts(lsa_mod_data_age$osuWIS) <- ccodes_mat

# Age Models ----
summary(lm(w0 ~ 1 + osuWIS*age_c, data = lsa_mod_data_age))

Call:
lm(formula = w0 ~ 1 + osuWIS * age_c, data = lsa_mod_data_age)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.069063 -0.016838 -0.000057  0.015047  0.086912 

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)     5.007e-01  2.144e-03 233.561   <2e-16 ***
osuWISc1       -9.822e-04  9.681e-04  -1.015    0.312    
osuWISc2        7.674e-04  2.185e-03   0.351    0.726    
osuWISc3        1.648e-04  2.762e-03   0.060    0.952    
age_c          -1.803e-04  1.610e-04  -1.119    0.264    
osuWISc1:age_c -6.340e-05  7.052e-05  -0.899    0.370    
osuWISc2:age_c -1.272e-04  1.678e-04  -0.758    0.449    
osuWISc3:age_c -1.445e-04  2.032e-04  -0.711    0.478    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.02416 on 184 degrees of freedom
Multiple R-squared:  0.0302,    Adjusted R-squared:  -0.006698 
F-statistic: 0.8185 on 7 and 184 DF,  p-value: 0.573
summary(lm(w1 ~ 1 + osuWIS*age_c, data = lsa_mod_data_age))

Call:
lm(formula = w1 ~ 1 + osuWIS * age_c, data = lsa_mod_data_age)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.129017 -0.034815  0.006564  0.030428  0.107326 

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)     9.997e-02  4.185e-03  23.888   <2e-16 ***
osuWISc1        3.541e-03  1.890e-03   1.873   0.0626 .  
osuWISc2       -6.438e-04  4.265e-03  -0.151   0.8802    
osuWISc3       -2.543e-04  5.393e-03  -0.047   0.9624    
age_c           4.103e-04  3.144e-04   1.305   0.1936    
osuWISc1:age_c  7.904e-06  1.377e-04   0.057   0.9543    
osuWISc2:age_c -5.898e-05  3.276e-04  -0.180   0.8573    
osuWISc3:age_c  1.352e-04  3.967e-04   0.341   0.7336    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.04717 on 184 degrees of freedom
Multiple R-squared:  0.02636,   Adjusted R-squared:  -0.01068 
F-statistic: 0.7117 on 7 and 184 DF,  p-value: 0.6622
# Visualizing age models
ggplot(lsa_mod_data_age, aes(age_c, w0, group = osuWIS, color = osuWIS)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(x = "Age (centered)", y = "Average LSA Rating (w0)") +
  theme_minimal()

ggplot(lsa_mod_data_age, aes(age_c, w0)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE) +
  labs(x = "Age (centered)", y = "Average LSA Rating (w0)") +
  theme_minimal()

ggplot(lsa_mod_data_age, aes(age_c, w1, group = osuWIS, color = osuWIS)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(x = "Age (centered", y = "Within-ss LSA effect (w1)") +
  theme_minimal()

w1_sum <- lsa_mod_data_age %>% 
  group_by(c1) %>%
  summarise(m = mean(w1), sd = sd(w1), n = n(), sem = sd/sqrt(n)) %>%
  ungroup()
  
ggplot(w1_sum, 
       aes(factor(c1), m, group = factor(c1), color = factor(c1))
       ) +
  geom_point(data = lsa_mod_data_age, aes(y = w1), shape = 1, position = pj) +
  geom_point() +
  geom_errorbar(aes(ymin = m-sem, ymax = m+sem)) +
  labs(x = "Group", 
       y = "Within-ss LSA effect (w1)", 
       title = "LSA Age Model Trending Effect"
       ) +
  theme_minimal()

# IQ Models ----
summary(lm(w0 ~ 1 + osuWIS*iq_c, data = lsa_mod_data_age))

Call:
lm(formula = w0 ~ 1 + osuWIS * iq_c, data = lsa_mod_data_age)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.061796 -0.015132  0.000061  0.014472  0.059209 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)    5.006e-01  1.983e-03 252.477  < 2e-16 ***
osuWISc1      -4.352e-04  8.830e-04  -0.493  0.62272    
osuWISc2       6.674e-04  2.019e-03   0.331  0.74129    
osuWISc3      -1.293e-03  2.586e-03  -0.500  0.61777    
iq_c          -5.828e-04  1.518e-04  -3.839  0.00017 ***
osuWISc1:iq_c -5.701e-05  6.886e-05  -0.828  0.40880    
osuWISc2:iq_c  2.122e-04  1.592e-04   1.333  0.18418    
osuWISc3:iq_c  7.860e-05  1.838e-04   0.428  0.66946    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.02316 on 184 degrees of freedom
Multiple R-squared:  0.1085,    Adjusted R-squared:  0.07459 
F-statistic: 3.199 on 7 and 184 DF,  p-value: 0.0032
summary(lm(w1 ~ 1 + osuWIS*iq_c, data = lsa_mod_data_age))

Call:
lm(formula = w1 ~ 1 + osuWIS * iq_c, data = lsa_mod_data_age)

Residuals:
      Min        1Q    Median        3Q       Max 
-0.107168 -0.033706  0.005113  0.028159  0.099507 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)    1.012e-01  3.880e-03  26.084  < 2e-16 ***
osuWISc1       2.579e-03  1.728e-03   1.493  0.13726    
osuWISc2      -1.203e-03  3.949e-03  -0.305  0.76093    
osuWISc3       1.454e-03  5.060e-03   0.287  0.77412    
iq_c           9.337e-04  2.970e-04   3.144  0.00195 ** 
osuWISc1:iq_c  1.642e-04  1.347e-04   1.219  0.22447    
osuWISc2:iq_c -3.435e-04  3.114e-04  -1.103  0.27154    
osuWISc3:iq_c  8.512e-05  3.597e-04   0.237  0.81319    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.04532 on 184 degrees of freedom
Multiple R-squared:  0.1011,    Adjusted R-squared:  0.06689 
F-statistic: 2.956 on 7 and 184 DF,  p-value: 0.005853
# Visualizing IQ models
ggplot(lsa_mod_data_age, aes(iq_c, w0, group = osuWIS, color = osuWIS)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(x = "WASI Full 2 IQ (centered", y = "Average LSA Rating (w0)") +
  theme_minimal()

ggplot(lsa_mod_data_age, aes(iq_c, w0)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE) +
  labs(x = "WASI Full 2 IQ (centered", y = "Average LSA Rating (w0)") +
  theme_minimal()

ggplot(lsa_mod_data_age, aes(iq_c, w1, group = osuWIS, color = osuWIS)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(x = "WASI Full 2 IQ (centered", y = "Within-ss LSA effect (w1)") +
  theme_minimal()

ggplot(lsa_mod_data_age, aes(iq_c, w1)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE) +
  labs(x = "WASI Full 2 IQ (centered", y = "Within-ss LSA effect (w1)") +
  theme_minimal()

Word Frequency

# Calculating WF measures

# DOD Controls

# First let's get the max num of multi-word responses
dod_hsct_ctl_calc_numwords <- dod_hsct_ctl_calc %>%
  rowwise() %>%
  mutate(num_words = wordcount(resp)) # wordcount() is from ngram

# The max number of multi-word resp
max(dod_hsct_ctl_calc_numwords$num_words) # answer is 2
[1] 2
# Calculates WF measures for each word (even if multi-word resp)
dod_hsct_ctl_subtlus <- dod_hsct_ctl_calc %>% 
  separate(resp, into = c("resp1", "resp2"), sep = " ") %>% # uses above calc
  select(DODID:resp2) %>%
  gather(resp, Word, -DODID, -section, -num) %>%
  filter(complete.cases(Word)) %>% # removes the NAs for single word responses
  left_join(., subtlus)

# Takes the average for multi-word resp (if there are some)
dod_hsct_ctl_subtlus_1 <- dod_hsct_ctl_subtlus %>%
  group_by(DODID, section, num) %>%
  summarise(n = n(),
            n.na = sum(is.na(SUBTLWF)),
            wf = mean(SUBTLWF, na.rm = FALSE), # FALSE ensures that the resp
            cd = mean(SUBTLCD, na.rm = FALSE)  # wont be counted in average
            ) %>%
  ungroup()

dod_hsct_ctl_subtlus_ss <- dod_hsct_ctl_subtlus_1 %>%
  group_by(DODID, section) %>%
  summarise(N = n(), 
            N.na = sum(is.na(wf)),
            m_wf = mean(wf, na.rm = TRUE), 
            m_cd = mean(cd, na.rm = TRUE)
            ) %>%
  mutate(group = "Control") %>%
  ungroup()

# DOD TBIs

# First let's get the max num of multi-word responses
dod_hsct_tbi_calc_numwords <- dod_hsct_tbi_calc %>%
  rowwise() %>%
  mutate(num_words = wordcount(resp)) # wordcount() is from ngram

# The max number of multi-word resp
max(dod_hsct_tbi_calc_numwords$num_words) # answer is 4
[1] 4
# Calculates WF measures for each word (even if multi-word resp)
dod_hsct_tbi_subtlus <- dod_hsct_tbi_calc %>% 
  separate(resp, into = c("resp1", "resp2", "resp3", "resp4"), 
           sep = " "
           ) %>% # uses above calc
  select(DODID:resp4) %>%
  gather(resp, Word, -DODID, -section, -num) %>%
  filter(complete.cases(Word)) %>% # removes the NAs for single word responses
  left_join(., subtlus)

# Takes the average for multi-word resp (if there are some)
dod_hsct_tbi_subtlus_1 <- dod_hsct_tbi_subtlus %>%
  group_by(DODID, section, num) %>%
  summarise(n = n(),
            n.na = sum(is.na(SUBTLWF)),
            wf = mean(SUBTLWF, na.rm = FALSE), # FALSE ensures that the resp
            cd = mean(SUBTLCD, na.rm = FALSE)  # wont be counted in average
            ) %>%
  ungroup()

dod_hsct_tbi_subtlus_ss <- dod_hsct_tbi_subtlus_1 %>%
  group_by(DODID, section) %>%
  summarise(N = n(), 
            N.na = sum(is.na(wf)),
            m_wf = mean(wf, na.rm = TRUE), 
            m_cd = mean(cd, na.rm = TRUE)
            ) %>%
  mutate(group = "TBI") %>%
  ungroup()

# HSCT Controls 
# First let's get the max num of multi-word responses
ctl_hsct_calc_numwords <- ctl_hsct_calc %>%
  rowwise() %>%
  mutate(num_words = wordcount(resp)) # wordcount() is from ngram

# The max number of multi-word resp
max(ctl_hsct_calc_numwords$num_words) # answer is 2
[1] 2
# Calculates WF measures for each word (even if multi-word resp)
ctl_hsct_calc_subtlus <- ctl_hsct_calc %>%
  separate(resp, into = c("resp1", "resp2"), 
           sep = " "
           ) %>% # uses above calc
  select(DODID, section, num, resp1, resp2) %>%
  gather(resp, Word, -DODID, -section, -num) %>%
  filter(complete.cases(Word)) %>% # removes the NAs for single word responses
  left_join(., subtlus)

# Takes the average for multi-word resp (if there are some)
ctl_hsct_calc_subtlus_1 <- ctl_hsct_calc_subtlus %>%
  group_by(DODID, section, num) %>%
  summarise(n = n(),
            n.na = sum(is.na(SUBTLWF)),
            wf = mean(SUBTLWF, na.rm = FALSE), # FALSE ensures that the resp
            cd = mean(SUBTLCD, na.rm = FALSE)  # wont be counted in average
            ) %>%
  ungroup()

ctl_hsct_calc_subtlus_ss <- ctl_hsct_calc_subtlus_1 %>%
  group_by(DODID, section) %>%
  summarise(N = n(), 
            N.na = sum(is.na(wf)),
            m_wf = mean(wf, na.rm = TRUE), 
            m_cd = mean(cd, na.rm = TRUE)
            ) %>%
  ungroup() %>%
  mutate(DODID = as.character(DODID),
         group = "Control")

# Next, combine into one big df
wf_data_ss <- bind_rows(ctl_hsct_calc_subtlus_ss,
                        dod_hsct_tbi_subtlus_ss,
                        dod_hsct_ctl_subtlus_ss
                        ) %>%
  filter(DODID %in% unique(demo_neuropsych_clean_2$DODID)) %>% # drops ss from earlier
  left_join(., osu_simple, by = "DODID") %>%
  mutate(osuWIS = ifelse(group == "Control" & is.na(osuWorstInjury),
                         "Control",
                         osuWorstInjury),
         osuGroups = case_when(
           osuWIS == "Control" ~ "Control",
           osuWIS == "2" ~ "Mild",
           osuWIS == "3" ~ "Mild",
           osuWIS == "4" ~ "Mod/Severe",
           osuWIS == "5" ~ "Mod/Severe"
           )
         ) %>%
  mutate(osuWIS = fct_relevel(osuWIS, c("Control", "2", "3", "4", "5")),
         osuGroups = fct_relevel(osuGroups, c("Control", "Mild", "Mod/Severe"))
         )

wf_data_sum <- wf_data_ss %>%
  group_by(group, section) %>%
  summarise(n = n(), 
            n.na = sum(N.na), 
            wf = mean(m_wf),
            sd_wf = sd(m_wf),
            sem_wf = sd_wf/sqrt(n),
            cd = mean(m_cd),
            sd_cd = sd(m_cd),
            sem_cd = sd_cd/sqrt(n)
            ) %>%
  ungroup()

wf_osu_sum <- wf_data_ss %>%
  group_by(osuWIS, section) %>%
  summarise(n = n(), 
            n.na = sum(N.na), 
            wf = mean(m_wf),
            sd_wf = sd(m_wf),
            sem_wf = sd_wf/sqrt(n),
            cd = mean(m_cd),
            sd_cd = sd(m_cd),
            sem_cd = sd_cd/sqrt(n)
            ) %>%
  ungroup()



# plot
pd <- position_dodge(width = .1)
ggplot(wf_data_sum, aes(section, wf, group = group, color = group)) +
  geom_point(data = wf_data_ss, aes(y = m_wf), 
             shape = 1, alpha = 2/3, position = "jitter"
             ) +
  geom_point(position = pd) +
  geom_errorbar(aes(ymin = wf-sem_wf, ymax = wf+sem_wf), 
                width = .05, 
                position = pd
                ) +
  geom_line(position = pd, alpha = 2/3) +
  labs(x = "Hayling Section", y = "Mean WF Estimate", caption = "SEM error bars") +
  scale_color_brewer(palette = "Dark2") +
  theme_minimal()

pd <- position_dodge(width = .1)
ggplot(wf_osu_sum, aes(section, wf, group = osuWIS, color = osuWIS)) +
  geom_point(data = wf_data_ss, aes(y = m_wf), 
             shape = 1, alpha = 2/3, position = "jitter"
             ) +
  geom_point(position = pd) +
  geom_errorbar(aes(ymin = wf-sem_wf, ymax = wf+sem_wf), 
                width = .05, 
                position = pd
                ) +
  geom_line(position = pd, alpha = 2/3) +
  labs(x = "Hayling Section", y = "Mean WF Estimate", caption = "SEM error bars") +
  scale_color_brewer(palette = "Set1") +
  theme_minimal()

# ggsave(filename = "wf-plot.svg", width = 5, height = 4, units = "in", path = "viz/")
# ggsave(filename = "wf-plot.png", width = 5, height = 4, units = "in", path = "viz/")


ezANOVA(data = wf_data_ss,
        dv = m_wf,
        wid = DODID,
        within = section,
        between = group,
        detailed = T
        )
$ANOVA
         Effect DFn DFd          SSn     SSd            F            p
1   (Intercept)   1 190 35245877.413 5343345 1253.2816859 1.366276e-85
2         group   1 190    14610.480 5343345    0.5195231 4.719311e-01
3       section   1 190  7676397.875 3406262  428.1865118 1.465461e-50
4 group:section   1 190     4650.237 3406262    0.2593884 6.111316e-01
  p<.05          ges
1     * 0.8011248750
2       0.0016670604
3     * 0.4673319936
4       0.0005311972
pd <- position_dodge(width = .1)
ggplot(wf_data_sum, aes(section, cd, group = group, color = group)) +
  geom_point(data = wf_data_ss, aes(y = m_cd), 
             shape = 1, alpha = 2/3, position = "jitter"
             ) +
  geom_point(position = pd) +
  geom_errorbar(aes(ymin = cd-sem_cd, ymax = cd+sem_cd), 
                width = .05, 
                position = pd
                ) +
  geom_line(position = pd, alpha = 2/3) +
  labs(x = "Hayling Section", y = "Mean SV Estimate", caption = "SEM error bars") +
  scale_color_brewer(palette = "Dark2") +
  theme_minimal()

pd <- position_dodge(width = .1)
ggplot(wf_osu_sum, aes(section, cd, group = osuWIS, color = osuWIS)) +
  geom_point(data = wf_data_ss, aes(y = m_cd), 
             shape = 1, alpha = 2/3, position = "jitter"
             ) +
  geom_point(position = pd) +
  geom_errorbar(aes(ymin = cd-sem_cd, ymax = cd+sem_cd), 
                width = .05, 
                position = pd
                ) +
  geom_line(position = pd, alpha = 2/3) +
  labs(x = "Hayling Section", y = "Mean SV Estimate", caption = "SEM error bars") +
  scale_color_brewer(palette = "Set1") +
  theme_minimal()

# ggsave(filename = "sv-plot.svg", width = 5, height = 4, units = "in", path = "viz/")
# ggsave(filename = "sv-plot.png", width = 5, height = 4, units = "in", path = "viz/")

ezANOVA(data = wf_data_ss,
        dv = m_cd,
        wid = DODID,
        within = section,
        between = group,
        detailed = T
        )
$ANOVA
         Effect DFn DFd          SSn       SSd           F             p
1   (Intercept)   1 190 478072.63040  9413.648 9649.160097 8.255836e-165
2         group   1 190     51.62391  9413.648    1.041949  3.086665e-01
3       section   1 190  69580.82094 10221.026 1293.447039  1.005008e-86
4 group:section   1 190    104.08195 10221.026    1.934793  1.658618e-01
  p<.05         ges
1     * 0.960549756
2       0.002622327
3     * 0.779918563
4       0.005272974
ezANOVA(data = wf_data_ss,
        dv = m_cd,
        wid = DODID,
        within = section,
        between = osuGroups,
        detailed = T
        )
$ANOVA
             Effect DFn DFd         SSn       SSd            F
1       (Intercept)   1 189 478072.6304  9257.957 9759.7916505
2         osuGroups   2 189    207.3157  9257.957    2.1161615
3           section   1 189  69580.8209 10217.821 1287.0429836
4 osuGroups:section   2 189    107.2868 10217.821    0.9922471
              p p<.05         ges
1 1.269402e-164     * 0.960856517
2  1.233394e-01       0.010532677
3  2.743715e-86     * 0.781310110
4  3.726655e-01       0.005478551

Modeling WF with MCA

wf_data_ss
# A tibble: 384 x 10
   DODID section     N  N.na  m_wf  m_cd group osuWorstInjury osuWIS
   <chr> <chr>   <int> <int> <dbl> <dbl> <chr>          <dbl> <fct> 
 1 1228  SC         15     0 490.   55.1 Cont…             NA Contr…
 2 1228  UC         15     1 113.   23.9 Cont…             NA Contr…
 3 1240  SC         15     0 449.   53.4 Cont…             NA Contr…
 4 1240  UC         15     0 237.   33.5 Cont…             NA Contr…
 5 1267  SC         15     0 501.   49.2 Cont…             NA Contr…
 6 1267  UC         15     0  81.5  21.4 Cont…             NA Contr…
 7 1359  SC         15     1 484.   43.2 Cont…             NA Contr…
 8 1359  UC         15     0  84.0  18.6 Cont…             NA Contr…
 9 1447  SC         15     0 424.   46.2 Cont…             NA Contr…
10 1447  UC         15     0 140.   27.2 Cont…             NA Contr…
# ... with 374 more rows, and 1 more variable: osuGroups <fct>
wf_mod_data <- wf_data_ss %>% 
  mutate(secc = ifelse(section == "SC", 1, -1)) %>% # dependency
  group_by(DODID, osuWIS) %>%
  # introduces dependency of HSCT section
  summarise(w0_wf = mean(m_wf),     # between-ss effect
            w1_wf = sum(m_wf*secc), # within-ss effect of HSCT section 
            w0_cd = mean(m_cd),     # between-ss effect
            w1_cd = sum(m_cd*secc)  # within-ss effect of HSCT section) %>%
            ) %>%
  ungroup() %>%
  left_join(., ccodes_merge, by = "osuWIS") %>% # between-subjects ccodes
  left_join(., ages, by = "DODID") %>%
  mutate(age_c = scale(age, center = TRUE, scale = FALSE), # mean centers age
         iq_c = scale(wasi.full2IQ, center = TRUE, scale = FALSE),
         osuWIS = factor(osuWIS, 
                         levels = c("Control", "2", "3", "4", "5"),
                         labels = c("Control","Possible", "Mild", "Mod/Severe", "Mod/Severe") # converts to factor
                        )
         )

# imports these contrasts to factor
contrasts(wf_mod_data$osuWIS) <- ccodes_mat


# Word Frequency Age models ----
summary(lm(w0_wf ~ 1 + osuWIS*age_c, data = wf_mod_data))

Call:
lm(formula = w0_wf ~ 1 + osuWIS * age_c, data = wf_mod_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-137.80  -55.93  -22.97   26.48  977.11 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)    298.1157    10.4929  28.411   <2e-16 ***
osuWISc1        -3.0434     4.7389  -0.642    0.522    
osuWISc2        16.7723    10.6936   1.568    0.118    
osuWISc3       -16.2367    13.5205  -1.201    0.231    
age_c           -0.5931     0.7883  -0.752    0.453    
osuWISc1:age_c  -0.2750     0.3452  -0.797    0.427    
osuWISc2:age_c  -0.6626     0.8214  -0.807    0.421    
osuWISc3:age_c   0.1709     0.9947   0.172    0.864    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 118.3 on 184 degrees of freedom
Multiple R-squared:  0.03941,   Adjusted R-squared:  0.002866 
F-statistic: 1.078 on 7 and 184 DF,  p-value: 0.3789
summary(lm(w1_wf ~ 1 + osuWIS*age_c, data = wf_mod_data))

Call:
lm(formula = w1_wf ~ 1 + osuWIS * age_c, data = wf_mod_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-969.89  -57.16   55.53  108.05  442.37 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)    284.7466    16.8134  16.936   <2e-16 ***
osuWISc1         6.8364     7.5934   0.900    0.369    
osuWISc2       -10.9286    17.1350  -0.638    0.524    
osuWISc3        -8.6068    21.6648  -0.397    0.692    
age_c            1.4867     1.2632   1.177    0.241    
osuWISc1:age_c   0.4782     0.5532   0.865    0.388    
osuWISc2:age_c   0.1232     1.3162   0.094    0.926    
osuWISc3:age_c   1.8098     1.5939   1.135    0.258    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 189.5 on 184 degrees of freedom
Multiple R-squared:  0.03143,   Adjusted R-squared:  -0.005417 
F-statistic: 0.853 on 7 and 184 DF,  p-value: 0.545
# Word Frequency IQ Models ----
summary(lm(w0_wf ~ 1 + osuWIS*iq_c, data = wf_mod_data))

Call:
lm(formula = w0_wf ~ 1 + osuWIS * iq_c, data = wf_mod_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-179.28  -49.01  -16.83   24.64  890.44 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)   297.9337     9.7441  30.576  < 2e-16 ***
osuWISc1       -0.9736     4.3396  -0.224  0.82274    
osuWISc2       14.3941     9.9197   1.451  0.14847    
osuWISc3      -22.7911    12.7080  -1.793  0.07454 .  
iq_c           -2.3466     0.7460  -3.146  0.00193 ** 
osuWISc1:iq_c  -0.2494     0.3384  -0.737  0.46209    
osuWISc2:iq_c  -0.4072     0.7822  -0.521  0.60332    
osuWISc3:iq_c  -0.1571     0.9034  -0.174  0.86217    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 113.8 on 184 degrees of freedom
Multiple R-squared:   0.11, Adjusted R-squared:  0.07614 
F-statistic: 3.249 on 7 and 184 DF,  p-value: 0.002828
summary(lm(w1_wf ~ 1 + osuWIS*iq_c, data = wf_mod_data))

Call:
lm(formula = w1_wf ~ 1 + osuWIS * iq_c, data = wf_mod_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-969.39  -49.06   40.53  101.57  387.55 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)   290.7003    15.8620  18.327   <2e-16 ***
osuWISc1        0.3354     7.0642   0.047   0.9622    
osuWISc2       -8.4461    16.1478  -0.523   0.6016    
osuWISc3        2.4798    20.6867   0.120   0.9047    
iq_c            2.7536     1.2143   2.268   0.0245 *  
osuWISc1:iq_c   0.2116     0.5509   0.384   0.7013    
osuWISc2:iq_c  -1.1948     1.2733  -0.938   0.3493    
osuWISc3:iq_c   3.5894     1.4706   2.441   0.0156 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 185.3 on 184 degrees of freedom
Multiple R-squared:  0.07383,   Adjusted R-squared:  0.0386 
F-statistic: 2.095 on 7 and 184 DF,  p-value: 0.04605
# Visualizing word frequency IQ model results
ggplot(wf_mod_data, aes(iq_c, w0_wf)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE) +
  labs(x = "IQ (centered)", "Average WF Rating (w0)") +
  theme_minimal()

w0_wf_c3_sum <- wf_mod_data %>%
  group_by(c3) %>%
  summarise(m = mean(w0_wf), sd = sd(w0_wf), n = n(), sem = sd/sqrt(n)) %>%
  ungroup()

ggplot(w0_wf_c3_sum %>% filter(c3 != 0), aes(factor(c3), m)) +
  geom_point(data = wf_mod_data %>% filter(c3 != 0),
             aes(y = w0_wf), shape = 1, position = pj
             ) +
  geom_point() +
  geom_errorbar(aes(ymin = m-sem, ymax = m+sem), width = .05) +
  labs(x = "Group", y = "Average WF Rating (w0)") +
  theme_minimal()

ggplot(wf_mod_data, aes(iq_c, w1_wf)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE) +
  labs(x = "IQ (centered)", y = "Within-ss WF Effect (w1)") +
  theme_minimal()

ggplot(wf_mod_data %>% filter(c3 != 0), 
       aes(wasi.full2IQ, w1_wf, group = factor(c3), color = factor(c3))
       ) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE) +
  labs(x = "IQ", y = "Within-ss WF Effect (w1)") +
  theme_minimal()

# Contextual Diversity Age models ----
summary(lm(w0_cd ~ 1 + osuWIS*age_c, data = wf_mod_data))

Call:
lm(formula = w0_cd ~ 1 + osuWIS * age_c, data = wf_mod_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-12.4242  -2.8926  -0.0648   3.3520  13.1141 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)    35.03971    0.44048  79.549   <2e-16 ***
osuWISc1       -0.08980    0.19893  -0.451   0.6522    
osuWISc2        0.36718    0.44890   0.818   0.4144    
osuWISc3       -1.13946    0.56757  -2.008   0.0461 *  
age_c           0.02644    0.03309   0.799   0.4253    
osuWISc1:age_c -0.01611    0.01449  -1.112   0.2677    
osuWISc2:age_c -0.01688    0.03448  -0.490   0.6251    
osuWISc3:age_c -0.00692    0.04176  -0.166   0.8686    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 4.964 on 184 degrees of freedom
Multiple R-squared:  0.04179,   Adjusted R-squared:  0.005334 
F-statistic: 1.146 on 7 and 184 DF,  p-value: 0.3361
summary(lm(w1_cd ~ 1 + osuWIS*age_c, data = wf_mod_data))

Call:
lm(formula = w1_cd ~ 1 + osuWIS * age_c, data = wf_mod_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-34.687  -6.297   0.577   7.685  22.807 

Coefficients:
               Estimate Std. Error t value Pr(>|t|)    
(Intercept)    27.14774    0.91949  29.525   <2e-16 ***
osuWISc1       -0.14387    0.41527  -0.346    0.729    
osuWISc2        0.96822    0.93708   1.033    0.303    
osuWISc3       -0.23204    1.18481  -0.196    0.845    
age_c           0.08040    0.06908   1.164    0.246    
osuWISc1:age_c  0.03477    0.03025   1.149    0.252    
osuWISc2:age_c -0.03334    0.07198  -0.463    0.644    
osuWISc3:age_c  0.03794    0.08716   0.435    0.664    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 10.36 on 184 degrees of freedom
Multiple R-squared:  0.04304,   Adjusted R-squared:  0.006638 
F-statistic: 1.182 on 7 and 184 DF,  p-value: 0.3149
# Contextual Diversity IQ models ----
summary(lm(w0_cd ~ 1 + osuWIS*iq_c, data = wf_mod_data))

Call:
lm(formula = w0_cd ~ 1 + osuWIS * iq_c, data = wf_mod_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-13.5457  -3.2392   0.2192   3.0957  10.0139 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)   35.14364    0.40871  85.986  < 2e-16 ***
osuWISc1      -0.10419    0.18202  -0.572  0.56773    
osuWISc2       0.34515    0.41608   0.830  0.40788    
osuWISc3      -1.41155    0.53303  -2.648  0.00880 ** 
iq_c          -0.10144    0.03129  -3.242  0.00141 ** 
osuWISc1:iq_c -0.01054    0.01419  -0.743  0.45859    
osuWISc2:iq_c  0.01759    0.03281   0.536  0.59247    
osuWISc3:iq_c -0.00592    0.03789  -0.156  0.87603    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 4.775 on 184 degrees of freedom
Multiple R-squared:  0.1136,    Adjusted R-squared:  0.07991 
F-statistic:  3.37 on 7 and 184 DF,  p-value: 0.002089
summary(lm(w1_cd ~ 1 + osuWIS*iq_c, data = wf_mod_data))

Call:
lm(formula = w1_cd ~ 1 + osuWIS * iq_c, data = wf_mod_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-34.716  -6.255   0.337   7.012  24.220 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)   27.26963    0.84565  32.247  < 2e-16 ***
osuWISc1      -0.45738    0.37661  -1.214    0.226    
osuWISc2       0.76574    0.86089   0.889    0.375    
osuWISc3       0.39125    1.10287   0.355    0.723    
iq_c           0.26652    0.06474   4.117 5.79e-05 ***
osuWISc1:iq_c  0.01606    0.02937   0.547    0.585    
osuWISc2:iq_c -0.09919    0.06789  -1.461    0.146    
osuWISc3:iq_c  0.06644    0.07840   0.847    0.398    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 9.879 on 184 degrees of freedom
Multiple R-squared:  0.1304,    Adjusted R-squared:  0.0973 
F-statistic: 3.941 on 7 and 184 DF,  p-value: 0.0004937
ggplot(wf_mod_data, aes(wasi.full2IQ, w0_cd)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE) +
  labs(x = "IQ", y = "Average CD Rating (w0)") +
  theme_minimal()

w0_cd_c3_sum <- wf_mod_data %>%
  group_by(c3) %>%
  summarise(m = mean(w0_cd), sd = sd(w0_cd), n = n(), sem = sd/sqrt(n)) %>%
  ungroup()

ggplot(w0_cd_c3_sum %>% filter(c3 != 0), aes(factor(c3), m)) +
  geom_point(data = wf_mod_data %>% filter(c3 != 0),
             aes(y = w0_cd), shape = 1, position = pj
             ) +
  geom_point() +
  geom_errorbar(aes(ymin = m-sem, ymax = m+sem), width = .05) +
  labs(x = "Group", y = "Average CD Rating (w0)") +
  theme_minimal()

ggplot(wf_mod_data, aes(wasi.full2IQ, w1_cd)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE) +
  labs(x = "IQ", y = "Within-ss CD effect (w1)") +
  theme_minimal()